Load Libraries

# install.packages('knitr', repos = c('http://rforge.net', 'http://cran.rstudio.org'), type = 'source')
# install.packages("ggplot2")
# install.packages("gganimate")
# install.packages("gifski")
# install.packages("png")
# install.packages("transformr")
library("knitr")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("gganimate")
## Warning: package 'gganimate' was built under R version 3.4.4
library("gifski")
## Warning: package 'gifski' was built under R version 3.4.4
library("png")
## Warning: package 'png' was built under R version 3.4.4
library("transformr")
## Warning: package 'transformr' was built under R version 3.4.4

Dancing Landscape

Sources of Dancing Landscapes

  • Complex behavior
    • When factors that create landscapes are interdependent
    • When factors that create landscapes change over time

Examples of Dancing Landscapes

  • The stock market
  • Customer demand by product tag over time

Demonstration

f2D <- function(x, t) {
  return (cos(x) * sin(t * pi) - (abs(x)/10) + x/20)
}

par(mfrow=c(3, 3))
plot(f2D(seq(-10, 10, 0.1), 0.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.33), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.50), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.67), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.83), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.33), type="l")

par(mfrow=c(1, 1))

Assemble Sample dataframe

constructData2D <- function (func, xMin, xMax, precision, timeMin, timeMax)
{
  # Interval is bounded by gganimate - this just works, do not ask questions
  timeInterval = ((timeMax - timeMin) / 50) + 0.001
  
  data <- data.frame(X=0, Y=0, Time=0)
  i = 1
  for (t in seq(timeMin, timeMax, timeInterval))
  {
    for (x in seq(xMin, xMax, precision))
    {
      data[i,]$X = x
      data[i,]$Y = func(x, t)
      data[i,]$Time = t
      i = i + 1
    }
  }
  
  return (data)
}

timeData = constructData2D(f2D, -10, 10, 0.2, 0, 5)

## Output the dataframe and the graph it represents
str(timeData)
## 'data.frame':    5050 obs. of  3 variables:
##  $ X   : num  -10 -9.8 -9.6 -9.4 -9.2 -9 -8.8 -8.6 -8.4 -8.2 ...
##  $ Y   : num  -1.5 -1.47 -1.44 -1.41 -1.38 -1.35 -1.32 -1.29 -1.26 -1.23 ...
##  $ Time: num  0 0 0 0 0 0 0 0 0 0 ...
ggplot(timeData, aes(X, Y, group=Time)) +
  geom_path()

Animation of function over time

visualizeFunction <- function(dataframe)
{
  if (is.null(dataframe$OptimalX))
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
  else
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4) +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
}

visualizeFunction(timeData)

Setup Hill Climbing

The function updates the OptimalX and OptimalY columns of a dataframe containing X, Y, and Time to include the best results from hill climbing optimization function. It attempts to find the maxima.

ticksPerUnitTime controls how fast the hill climbing runs compared to the function. For every increase in the time value of 1.0, the hill climbing will update its value ticksPerUnitTime times.

costFunction is the function being optimized, of the form y = costFunction(x, time).

At time = 0, the algorithm will use a single random guess. That is, do not expect the first value to be a good one. Setting startX can control this instead of a random value.

windowSize controls how far the hill climb algorithm can guess for each guess. It will look up to (windowSize / 2) left or right from its current best

hillClimb <- function(timeDataFrame, ticksPerUnitTime, costFunction, startX = NULL, windowSize = 1)
{
  # Setup variables
  currentTime = min(timeDataFrame$Time)
  currentTick = 1
  currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
  bestX = startX
  if (is.null(bestX))
  {
    bestX = runif(1, min(currentSubset$X), max(currentSubset$X))
  }
  bestY = costFunction(bestX, currentTime)
  width = windowSize / 2

  # Record optimal x and y
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalX = rep(bestX, nrow(currentSubset))
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalY = rep(bestY, nrow(currentSubset))

  # Iterate through time
  nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  while (nrow(nextSubset) > 0)
  {
    # Calculate number of hill climb ticks (guesses) to do
    nextTime = min(nextSubset$Time)
    iterations = floor(nextTime * ticksPerUnitTime) - currentTick
    
    # Update variables
    currentTime = nextTime
    currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
    getOption <- function() { 
      return (runif(1, 
                    max(bestX - width, min(currentSubset$X)), 
                    min(bestX + width, max(currentSubset$X))))
    }
    bestY = costFunction(bestX, currentTime)

    # Actually do hill climbing
    if (iterations >= 1)
    for (i in 1:iterations)
    {
      currentTick = currentTick + 1
      testX = getOption()
      testY = costFunction(testX, currentTime)
      if (testY > bestY)
      {
        bestX = testX
        bestY = testY
      }
    }
    
    # Record new optimal x and y
    timeDataFrame$OptimalX[timeDataFrame$Time == currentTime] = rep(bestX, nrow(currentSubset))
    timeDataFrame$OptimalY[timeDataFrame$Time == currentTime] = rep(bestY, nrow(currentSubset))
    
    # Load next time subset
    nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  }
  
  return (timeDataFrame)
}

Run and Visualize Hill Climbing

# Approximately 1 guess per tick
timeData = hillClimb(timeData, 9, f2D, startX = -10)
## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables

## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables
visualizeFunction(timeData)
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).

## Warning: Removed 101 rows containing missing values (geom_point).
# Approximately 3 guesses per tick
timeData = hillClimb(timeData, 28, f2D, startX = -10)
visualizeFunction(timeData)

# Approximately 0.3 guesses per tick
timeData = hillClimb(timeData, 3, f2D, startX = -10)
visualizeFunction(timeData)

Takeaways

NetLogo in R

This next section sets up an agent-based model that exhibits complexity. We will use it to for the following workshop, where you will attempt to optimize over the output from the model.

The model is a square grid of cells, wrapping on both axes, with one agent per cell. Each agent has a value that it can adjust up or down. An agent wants to have a higher value than three of its neighbors, but does not want to be the highest. Agents will: * Subtract 3 if they are the highest * Add 1 if they are below average * Add 1 if they are above average and not the second-highest

Setup Utility Functions

# From https://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
maxN <- function(x, N=2){
  len <- length(x)
  if(N>len){
    warning('N greater than length(x).  Setting N=length(x)')
    N <- length(x)
  }
  sort(x,partial=len-N+1)[len-N+1]
}
# End sourced code

getAgentChange <- function(agentValue, neighborValues)
{
  neighborValues = c(neighborValues, agentValue)
  if (agentValue >= max(neighborValues))
    return (-3)
  if (agentValue < mean(neighborValues))
    return (1)
  if (agentValue >= mean(neighborValues) && agentValue < maxN(neighborValues, 2))
    return (1)
  return (0)
}

Setup grid

buildWorld <- function(height, width, startingValues = NULL)
{
  if (is.null(startingValues))
  {
    startingValues = rep(100, height * width)
  }
  densityValue = 1 / (height * width)
  sumStartValues = sum(startingValues)
  world <- data.frame(X=0, Y=0, Value=startingValues[1], Density = densityValue)
  i = 1
  for (x in 1:width)
  {
    for (y in 1:height)
    {
      world[i,]$X = x
      world[i,]$Y = y
      world[i,]$Value = startingValues[i]
      world[i,]$Density = startingValues[i] / sumStartValues
      i = i + 1
    }
  }
  
  return (world)
}

world = buildWorld(17, 17, seq(100, 128.9, 0.1))

ggplot(world, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Setup Changing Values

updateWorld <- function(world, updateFunction, orthogonalOnly = T)
{
  newWorld = buildWorld(max(world$X), max(world$Y))
  for (x in 1:max(world$X))
  {
    xLower = x - 1
    if (xLower == 0) xLower = max(world$X)
    xUpper = x + 1
    if (xUpper == max(world$X)) xUpper = 1
    
    for (y in 1:max(world$Y))
    {
      yLower = y - 1
      if (yLower == 0) yLower = max(world$Y)
      yUpper = y + 1
      if (yUpper == max(world$Y)) yUpper = 1
      
      neighborValues = c()
      neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == xLower & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yUpper,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yLower,]$Value)

      
      if (!orthogonalOnly)
      {
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yLower,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yLower,]$Value)
      }

      currentValue = world[newWorld$X == x & newWorld$Y == y,]$Value
      newWorld[newWorld$X == x & newWorld$Y == y,]$Value = currentValue + updateFunction(currentValue, neighborValues)
    }
  }
  
  newWorld$Density = newWorld$Value / sum(newWorld$Value)
  
  return (newWorld)
}

world2 = updateWorld(world, getAgentChange)
ggplot(world2, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Construct time data

lastStep = buildWorld(17, 17, seq(100, 128.9, 0.1))
lastStep$Time = rep(1, nrow(lastStep))
finalData = NULL

recordSteps = 25
totalSteps = 50
startRecording = totalSteps - recordSteps
addOptimalPoints = T

for (t in 2:totalSteps)
{
  newData = updateWorld(lastStep, getAgentChange)
  newData$Time = rep(t, nrow(newData))
  if (addOptimalPoints)
  {
    bestX = newData[newData$Value == max(newData$Value),]$X[1]
    bestY = newData[newData$Value == max(newData$Value),]$Y[1]
    newData$OptimalX = rep(bestX, nrow(newData))
    newData$OptimalY = rep(bestY, nrow(newData))
  }
  lastStep = newData
  
  if (t > startRecording && t < startRecording + recordSteps)
  {
    if(is.null(finalData))
      finalData = newData
    else
      finalData = rbind(finalData, newData)
  }
}

Prepare animated visualizer

visualizeFunction <- function(dataframe)
{
  if (is.null(dataframe$OptimalX))
  {
    ggplot(dataframe, aes(X, Y, z = Density)) +
      geom_raster(aes(fill = Density)) +
      geom_contour(colour = "white", bins=7) + 
      transition_states(Time, transition_length = 10, state_length = 0, wrap=F) + 
      labs(title = "Time: {closest_state}") + 
      enter_fade() + 
      exit_fade() + 
      ease_aes("linear")
  }
  else
  {
    ggplot(dataframe, aes(X, Y, z = Density)) +
      geom_raster(aes(fill = Density)) +
      geom_contour(colour = "white", bins=7) + 
      geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4) +
      transition_states(Time, transition_length = 10, state_length = 0, wrap=F) + 
      labs(title = "Time: {closest_state}") + 
      enter_fade() + 
      exit_fade() + 
      ease_aes("linear")
  }
}

Visualize

visualizeFunction(finalData)